In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
In [15]:
import os
In [16]:
os.chdir('C:\\MBA\\3rd SEM\\applicationofsoftware\\GDP')
In [17]:
df=pd.read_csv("C:\\MBA\\3rd SEM\\applicationofsoftware\\GDP\\part1.csv")
In [18]:
df
Out[18]:
Items Description Duration Andhra Pradesh Arunachal Pradesh Assam Bihar Chhattisgarh Goa Gujarat Haryana ... Telangana Tripura Uttar Pradesh Uttarakhand West Bengal1 Andaman & Nicobar Islands Chandigarh Delhi Puducherry All_India GDP
0 GSDP - CURRENT PRICES (` in Crore) 2011-12 379402.00 11063.00 143175.00 247144.00 158074.00 42367.00 615606.00 297539.00 ... 359433.00 19208.00 724049.00 115523.00 NaN 3979.00 18768.00 343767.00 16818.00 8736039.00
1 GSDP - CURRENT PRICES (` in Crore) 2012-13 411404.00 12547.00 156864.00 282368.00 177511.00 38120.00 724495.00 347032.00 ... 401493.00 21663.00 822903.00 131835.00 NaN 4421.00 21609.00 391238.00 18875.00 9946636.00
2 GSDP - CURRENT PRICES (` in Crore) 2013-14 464272.00 14602.00 177745.00 317101.00 206690.00 35921.00 807623.00 400662.00 ... 452186.00 25593.00 944146.00 149817.00 NaN 5159.00 24787.00 443783.00 21870.00 11236635.00
3 GSDP - CURRENT PRICES (` in Crore) 2014-15 526468.00 16761.00 198098.00 373920.00 234982.00 40633.00 895027.00 437462.00 ... 511178.00 29667.00 1043371.00 161985.00 NaN 5721.00 27844.00 492424.00 24089.00 12433749.00
4 GSDP - CURRENT PRICES (` in Crore) 2015-16 609934.00 18784.00 224234.00 413503.00 260776.00 45002.00 994316.00 485184.00 ... 575631.00 NaN 1153795.00 184091.00 NaN NaN 30304.00 551963.00 26533.00 13675331.00
5 GSDP - CURRENT PRICES (` in Crore) 2016-17 699307.00 NaN NaN NaN 290140.00 NaN NaN 547396.00 ... 654294.00 NaN NaN NaN NaN NaN NaN 622385.00 29557.00 15251028.00
6 (% Growth over previous year) 2012-13 8.43 13.41 9.56 14.25 12.30 -10.02 17.69 16.63 ... 11.70 12.78 13.65 14.12 NaN 11.13 15.14 13.81 12.23 13.86
7 (% Growth over previous year) 2013-14 12.85 16.38 13.31 12.30 16.44 -5.77 11.47 15.45 ... 12.63 18.14 14.73 13.64 NaN 16.68 14.71 13.43 15.87 12.97
8 (% Growth over previous year) 2014-15 13.40 14.79 11.45 17.92 13.69 13.12 10.82 9.18 ... 13.05 15.92 10.51 8.12 NaN 10.89 12.33 10.96 10.14 10.65
9 (% Growth over previous year) 2015-16 15.85 12.07 13.19 10.59 10.98 10.75 11.09 10.91 ... 12.61 NaN 10.58 13.65 NaN NaN 8.84 12.09 10.15 9.99
10 (% Growth over previous year) 2016-17 14.65 NaN NaN NaN 11.26 NaN NaN 12.82 ... 13.67 NaN NaN NaN NaN NaN NaN 12.76 11.40 11.52

11 rows × 36 columns

In [19]:
df.head()
Out[19]:
Items Description Duration Andhra Pradesh Arunachal Pradesh Assam Bihar Chhattisgarh Goa Gujarat Haryana ... Telangana Tripura Uttar Pradesh Uttarakhand West Bengal1 Andaman & Nicobar Islands Chandigarh Delhi Puducherry All_India GDP
0 GSDP - CURRENT PRICES (` in Crore) 2011-12 379402.0 11063.0 143175.0 247144.0 158074.0 42367.0 615606.0 297539.0 ... 359433.0 19208.0 724049.0 115523.0 NaN 3979.0 18768.0 343767.0 16818.0 8736039.0
1 GSDP - CURRENT PRICES (` in Crore) 2012-13 411404.0 12547.0 156864.0 282368.0 177511.0 38120.0 724495.0 347032.0 ... 401493.0 21663.0 822903.0 131835.0 NaN 4421.0 21609.0 391238.0 18875.0 9946636.0
2 GSDP - CURRENT PRICES (` in Crore) 2013-14 464272.0 14602.0 177745.0 317101.0 206690.0 35921.0 807623.0 400662.0 ... 452186.0 25593.0 944146.0 149817.0 NaN 5159.0 24787.0 443783.0 21870.0 11236635.0
3 GSDP - CURRENT PRICES (` in Crore) 2014-15 526468.0 16761.0 198098.0 373920.0 234982.0 40633.0 895027.0 437462.0 ... 511178.0 29667.0 1043371.0 161985.0 NaN 5721.0 27844.0 492424.0 24089.0 12433749.0
4 GSDP - CURRENT PRICES (` in Crore) 2015-16 609934.0 18784.0 224234.0 413503.0 260776.0 45002.0 994316.0 485184.0 ... 575631.0 NaN 1153795.0 184091.0 NaN NaN 30304.0 551963.0 26533.0 13675331.0

5 rows × 36 columns

In [20]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11 entries, 0 to 10
Data columns (total 36 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Items  Description         11 non-null     object 
 1   Duration                   11 non-null     object 
 2   Andhra Pradesh             11 non-null     float64
 3   Arunachal Pradesh          9 non-null      float64
 4   Assam                      9 non-null      float64
 5   Bihar                      9 non-null      float64
 6   Chhattisgarh               11 non-null     float64
 7   Goa                        9 non-null      float64
 8   Gujarat                    9 non-null      float64
 9   Haryana                    11 non-null     float64
 10  Himachal Pradesh           7 non-null      float64
 11  Jammu & Kashmir            9 non-null      float64
 12  Jharkhand                  9 non-null      float64
 13  Karnataka                  9 non-null      float64
 14  Kerala                     9 non-null      float64
 15  Madhya Pradesh             11 non-null     float64
 16  Maharashtra                7 non-null      float64
 17  Manipur                    7 non-null      float64
 18  Meghalaya                  11 non-null     float64
 19  Mizoram                    7 non-null      float64
 20  Nagaland                   7 non-null      float64
 21  Odisha                     11 non-null     float64
 22  Punjab                     7 non-null      float64
 23  Rajasthan                  7 non-null      float64
 24  Sikkim                     9 non-null      float64
 25  Tamil Nadu                 11 non-null     float64
 26  Telangana                  11 non-null     float64
 27  Tripura                    7 non-null      float64
 28  Uttar Pradesh              9 non-null      float64
 29  Uttarakhand                9 non-null      float64
 30  West Bengal1               0 non-null      float64
 31  Andaman & Nicobar Islands  7 non-null      float64
 32  Chandigarh                 9 non-null      float64
 33  Delhi                      11 non-null     float64
 34  Puducherry                 11 non-null     float64
 35  All_India GDP              11 non-null     float64
dtypes: float64(34), object(2)
memory usage: 3.2+ KB
In [21]:
df.columns
Out[21]:
Index(['Items  Description', 'Duration', 'Andhra Pradesh ',
       'Arunachal Pradesh', 'Assam', 'Bihar', 'Chhattisgarh', 'Goa', 'Gujarat',
       'Haryana', 'Himachal Pradesh', 'Jammu & Kashmir', 'Jharkhand',
       'Karnataka', 'Kerala', 'Madhya Pradesh', 'Maharashtra', 'Manipur',
       'Meghalaya', 'Mizoram', 'Nagaland', 'Odisha', 'Punjab', 'Rajasthan',
       'Sikkim', 'Tamil Nadu', 'Telangana', 'Tripura', 'Uttar Pradesh',
       'Uttarakhand', 'West Bengal1', 'Andaman & Nicobar Islands',
       'Chandigarh', 'Delhi', 'Puducherry', 'All_India GDP'],
      dtype='object')
In [22]:
df1 = df[df['Duration'] != '2016-17']
df1
Out[22]:
Items Description Duration Andhra Pradesh Arunachal Pradesh Assam Bihar Chhattisgarh Goa Gujarat Haryana ... Telangana Tripura Uttar Pradesh Uttarakhand West Bengal1 Andaman & Nicobar Islands Chandigarh Delhi Puducherry All_India GDP
0 GSDP - CURRENT PRICES (` in Crore) 2011-12 379402.00 11063.00 143175.00 247144.00 158074.00 42367.00 615606.00 297539.00 ... 359433.00 19208.00 724049.00 115523.00 NaN 3979.00 18768.00 343767.00 16818.00 8736039.00
1 GSDP - CURRENT PRICES (` in Crore) 2012-13 411404.00 12547.00 156864.00 282368.00 177511.00 38120.00 724495.00 347032.00 ... 401493.00 21663.00 822903.00 131835.00 NaN 4421.00 21609.00 391238.00 18875.00 9946636.00
2 GSDP - CURRENT PRICES (` in Crore) 2013-14 464272.00 14602.00 177745.00 317101.00 206690.00 35921.00 807623.00 400662.00 ... 452186.00 25593.00 944146.00 149817.00 NaN 5159.00 24787.00 443783.00 21870.00 11236635.00
3 GSDP - CURRENT PRICES (` in Crore) 2014-15 526468.00 16761.00 198098.00 373920.00 234982.00 40633.00 895027.00 437462.00 ... 511178.00 29667.00 1043371.00 161985.00 NaN 5721.00 27844.00 492424.00 24089.00 12433749.00
4 GSDP - CURRENT PRICES (` in Crore) 2015-16 609934.00 18784.00 224234.00 413503.00 260776.00 45002.00 994316.00 485184.00 ... 575631.00 NaN 1153795.00 184091.00 NaN NaN 30304.00 551963.00 26533.00 13675331.00
6 (% Growth over previous year) 2012-13 8.43 13.41 9.56 14.25 12.30 -10.02 17.69 16.63 ... 11.70 12.78 13.65 14.12 NaN 11.13 15.14 13.81 12.23 13.86
7 (% Growth over previous year) 2013-14 12.85 16.38 13.31 12.30 16.44 -5.77 11.47 15.45 ... 12.63 18.14 14.73 13.64 NaN 16.68 14.71 13.43 15.87 12.97
8 (% Growth over previous year) 2014-15 13.40 14.79 11.45 17.92 13.69 13.12 10.82 9.18 ... 13.05 15.92 10.51 8.12 NaN 10.89 12.33 10.96 10.14 10.65
9 (% Growth over previous year) 2015-16 15.85 12.07 13.19 10.59 10.98 10.75 11.09 10.91 ... 12.61 NaN 10.58 13.65 NaN NaN 8.84 12.09 10.15 9.99

9 rows × 36 columns

In [23]:
df1.isnull().sum()
Out[23]:
Items  Description           0
Duration                     0
Andhra Pradesh               0
Arunachal Pradesh            0
Assam                        0
Bihar                        0
Chhattisgarh                 0
Goa                          0
Gujarat                      0
Haryana                      0
Himachal Pradesh             2
Jammu & Kashmir              0
Jharkhand                    0
Karnataka                    0
Kerala                       0
Madhya Pradesh               0
Maharashtra                  2
Manipur                      2
Meghalaya                    0
Mizoram                      2
Nagaland                     2
Odisha                       0
Punjab                       2
Rajasthan                    2
Sikkim                       0
Tamil Nadu                   0
Telangana                    0
Tripura                      2
Uttar Pradesh                0
Uttarakhand                  0
West Bengal1                 9
Andaman & Nicobar Islands    2
Chandigarh                   0
Delhi                        0
Puducherry                   0
All_India GDP                0
dtype: int64
In [24]:
df1.isnull().all(axis=0)
Out[24]:
Items  Description           False
Duration                     False
Andhra Pradesh               False
Arunachal Pradesh            False
Assam                        False
Bihar                        False
Chhattisgarh                 False
Goa                          False
Gujarat                      False
Haryana                      False
Himachal Pradesh             False
Jammu & Kashmir              False
Jharkhand                    False
Karnataka                    False
Kerala                       False
Madhya Pradesh               False
Maharashtra                  False
Manipur                      False
Meghalaya                    False
Mizoram                      False
Nagaland                     False
Odisha                       False
Punjab                       False
Rajasthan                    False
Sikkim                       False
Tamil Nadu                   False
Telangana                    False
Tripura                      False
Uttar Pradesh                False
Uttarakhand                  False
West Bengal1                  True
Andaman & Nicobar Islands    False
Chandigarh                   False
Delhi                        False
Puducherry                   False
All_India GDP                False
dtype: bool
In [25]:
# removing West Bengal as the whole column is NAN
df1 = df1.drop('West Bengal1', axis = 1)
In [26]:
df1
Out[26]:
Items Description Duration Andhra Pradesh Arunachal Pradesh Assam Bihar Chhattisgarh Goa Gujarat Haryana ... Tamil Nadu Telangana Tripura Uttar Pradesh Uttarakhand Andaman & Nicobar Islands Chandigarh Delhi Puducherry All_India GDP
0 GSDP - CURRENT PRICES (` in Crore) 2011-12 379402.00 11063.00 143175.00 247144.00 158074.00 42367.00 615606.00 297539.00 ... 751485.00 359433.00 19208.00 724049.00 115523.00 3979.00 18768.00 343767.00 16818.00 8736039.00
1 GSDP - CURRENT PRICES (` in Crore) 2012-13 411404.00 12547.00 156864.00 282368.00 177511.00 38120.00 724495.00 347032.00 ... 855481.00 401493.00 21663.00 822903.00 131835.00 4421.00 21609.00 391238.00 18875.00 9946636.00
2 GSDP - CURRENT PRICES (` in Crore) 2013-14 464272.00 14602.00 177745.00 317101.00 206690.00 35921.00 807623.00 400662.00 ... 971090.00 452186.00 25593.00 944146.00 149817.00 5159.00 24787.00 443783.00 21870.00 11236635.00
3 GSDP - CURRENT PRICES (` in Crore) 2014-15 526468.00 16761.00 198098.00 373920.00 234982.00 40633.00 895027.00 437462.00 ... 1092564.00 511178.00 29667.00 1043371.00 161985.00 5721.00 27844.00 492424.00 24089.00 12433749.00
4 GSDP - CURRENT PRICES (` in Crore) 2015-16 609934.00 18784.00 224234.00 413503.00 260776.00 45002.00 994316.00 485184.00 ... 1212668.00 575631.00 NaN 1153795.00 184091.00 NaN 30304.00 551963.00 26533.00 13675331.00
6 (% Growth over previous year) 2012-13 8.43 13.41 9.56 14.25 12.30 -10.02 17.69 16.63 ... 13.84 11.70 12.78 13.65 14.12 11.13 15.14 13.81 12.23 13.86
7 (% Growth over previous year) 2013-14 12.85 16.38 13.31 12.30 16.44 -5.77 11.47 15.45 ... 13.51 12.63 18.14 14.73 13.64 16.68 14.71 13.43 15.87 12.97
8 (% Growth over previous year) 2014-15 13.40 14.79 11.45 17.92 13.69 13.12 10.82 9.18 ... 12.51 13.05 15.92 10.51 8.12 10.89 12.33 10.96 10.14 10.65
9 (% Growth over previous year) 2015-16 15.85 12.07 13.19 10.59 10.98 10.75 11.09 10.91 ... 10.99 12.61 NaN 10.58 13.65 NaN 8.84 12.09 10.15 9.99

9 rows × 35 columns

In [27]:
df1.iloc[6:].isnull().sum() 
# since there are at max. only 1 missing value we can take the average of the other two numbers
Out[27]:
Items  Description           0
Duration                     0
Andhra Pradesh               0
Arunachal Pradesh            0
Assam                        0
Bihar                        0
Chhattisgarh                 0
Goa                          0
Gujarat                      0
Haryana                      0
Himachal Pradesh             1
Jammu & Kashmir              0
Jharkhand                    0
Karnataka                    0
Kerala                       0
Madhya Pradesh               0
Maharashtra                  1
Manipur                      1
Meghalaya                    0
Mizoram                      1
Nagaland                     1
Odisha                       0
Punjab                       1
Rajasthan                    1
Sikkim                       0
Tamil Nadu                   0
Telangana                    0
Tripura                      1
Uttar Pradesh                0
Uttarakhand                  0
Andaman & Nicobar Islands    1
Chandigarh                   0
Delhi                        0
Puducherry                   0
All_India GDP                0
dtype: int64
In [28]:
avg_growth = df1.iloc[6:]
In [29]:
avg_growth #dataframe to find the average growth of states
Out[29]:
Items Description Duration Andhra Pradesh Arunachal Pradesh Assam Bihar Chhattisgarh Goa Gujarat Haryana ... Tamil Nadu Telangana Tripura Uttar Pradesh Uttarakhand Andaman & Nicobar Islands Chandigarh Delhi Puducherry All_India GDP
7 (% Growth over previous year) 2013-14 12.85 16.38 13.31 12.30 16.44 -5.77 11.47 15.45 ... 13.51 12.63 18.14 14.73 13.64 16.68 14.71 13.43 15.87 12.97
8 (% Growth over previous year) 2014-15 13.40 14.79 11.45 17.92 13.69 13.12 10.82 9.18 ... 12.51 13.05 15.92 10.51 8.12 10.89 12.33 10.96 10.14 10.65
9 (% Growth over previous year) 2015-16 15.85 12.07 13.19 10.59 10.98 10.75 11.09 10.91 ... 10.99 12.61 NaN 10.58 13.65 NaN 8.84 12.09 10.15 9.99

3 rows × 35 columns

In [30]:
avg_growth.columns
Out[30]:
Index(['Items  Description', 'Duration', 'Andhra Pradesh ',
       'Arunachal Pradesh', 'Assam', 'Bihar', 'Chhattisgarh', 'Goa', 'Gujarat',
       'Haryana', 'Himachal Pradesh', 'Jammu & Kashmir', 'Jharkhand',
       'Karnataka', 'Kerala', 'Madhya Pradesh', 'Maharashtra', 'Manipur',
       'Meghalaya', 'Mizoram', 'Nagaland', 'Odisha', 'Punjab', 'Rajasthan',
       'Sikkim', 'Tamil Nadu', 'Telangana', 'Tripura', 'Uttar Pradesh',
       'Uttarakhand', 'Andaman & Nicobar Islands', 'Chandigarh', 'Delhi',
       'Puducherry', 'All_India GDP'],
      dtype='object')
In [31]:
avg_growth.count
Out[31]:
<bound method DataFrame.count of               Items  Description Duration  Andhra Pradesh   Arunachal Pradesh  \
7  (% Growth over previous year)  2013-14            12.85              16.38   
8  (% Growth over previous year)  2014-15            13.40              14.79   
9  (% Growth over previous year)  2015-16            15.85              12.07   

   Assam  Bihar  Chhattisgarh    Goa  Gujarat  Haryana  ...  Tamil Nadu  \
7  13.31  12.30         16.44  -5.77    11.47    15.45  ...       13.51   
8  11.45  17.92         13.69  13.12    10.82     9.18  ...       12.51   
9  13.19  10.59         10.98  10.75    11.09    10.91  ...       10.99   

   Telangana  Tripura  Uttar Pradesh  Uttarakhand  Andaman & Nicobar Islands  \
7      12.63    18.14          14.73        13.64                      16.68   
8      13.05    15.92          10.51         8.12                      10.89   
9      12.61      NaN          10.58        13.65                        NaN   

   Chandigarh  Delhi  Puducherry  All_India GDP  
7       14.71  13.43       15.87          12.97  
8       12.33  10.96       10.14          10.65  
9        8.84  12.09       10.15           9.99  

[3 rows x 35 columns]>
In [32]:
# Taking only the values for the states
average_growth_values = avg_growth[avg_growth.columns[2:34]].mean()
In [33]:
# Sorting the average growth rate values and then making a dataframe for all the states
average_growth_values = average_growth_values.sort_values()
average_growth_rate = average_growth_values.to_frame(name='Average growth rate')
average_growth_rate
Out[33]:
Average growth rate
Goa 6.033333
Meghalaya 6.953333
Odisha 9.836667
Sikkim 10.486667
Jammu & Kashmir 10.900000
Gujarat 11.126667
Punjab 11.185000
Maharashtra 11.260000
Rajasthan 11.320000
Jharkhand 11.500000
Uttarakhand 11.803333
Haryana 11.846667
Uttar Pradesh 11.940000
Chandigarh 11.960000
Puducherry 12.053333
Delhi 12.160000
Himachal Pradesh 12.280000
Tamil Nadu 12.336667
Kerala 12.583333
Madhya Pradesh 12.626667
Assam 12.650000
Telangana 12.763333
Bihar 13.603333
Chhattisgarh 13.703333
Andaman & Nicobar Islands 13.785000
Andhra Pradesh 14.033333
Karnataka 14.120000
Arunachal Pradesh 14.413333
Manipur 14.610000
Nagaland 16.415000
Tripura 17.030000
Mizoram 17.700000
In [34]:
# plotting the average growth rate for all the states
plt.figure(figsize=(12,10), dpi = 300)

sns.barplot(x = average_growth_rate['Average growth rate'], y = average_growth_values.index,palette='viridis')
plt.xlabel('Average Growth Rate', fontsize=12)
plt.ylabel('States', fontsize=12)
plt.title('Average Growth Rate for all the states',fontsize=13)
plt.show()
In [35]:
# top 5 states as per average growth rate

average_growth_rate['Average growth rate'][-5:]
Out[35]:
Arunachal Pradesh    14.413333
Manipur              14.610000
Nagaland             16.415000
Tripura              17.030000
Mizoram              17.700000
Name: Average growth rate, dtype: float64
In [36]:
# top 5 states as per average growth rate for the years 2013-14, 2014-15, 2015-16

avg_growth[['Mizoram','Tripura','Nagaland','Manipur','Arunachal Pradesh']]
Out[36]:
Mizoram Tripura Nagaland Manipur Arunachal Pradesh
7 23.1 18.14 21.98 17.83 16.38
8 12.3 15.92 10.85 11.39 14.79
9 NaN NaN NaN NaN 12.07
In [37]:
#create a dataframe to store the mean and the standard deviation of the growth rate for various states

describe = pd.DataFrame(avg_growth.describe())
describe = describe.T
describe
Out[37]:
count mean std min 25% 50% 75% max
Andhra Pradesh 3.0 14.033333 1.597133 12.85 13.1250 13.400 14.6250 15.85
Arunachal Pradesh 3.0 14.413333 2.179549 12.07 13.4300 14.790 15.5850 16.38
Assam 3.0 12.650000 1.040961 11.45 12.3200 13.190 13.2500 13.31
Bihar 3.0 13.603333 3.834871 10.59 11.4450 12.300 15.1100 17.92
Chhattisgarh 3.0 13.703333 2.730024 10.98 12.3350 13.690 15.0650 16.44
Goa 3.0 6.033333 10.290444 -5.77 2.4900 10.750 11.9350 13.12
Gujarat 3.0 11.126667 0.326548 10.82 10.9550 11.090 11.2800 11.47
Haryana 3.0 11.846667 3.238245 9.18 10.0450 10.910 13.1800 15.45
Himachal Pradesh 2.0 12.280000 3.026417 10.14 11.2100 12.280 13.3500 14.42
Jammu & Kashmir 3.0 10.900000 6.642146 4.70 7.3950 10.090 14.0000 17.91
Jharkhand 3.0 11.500000 3.610374 7.92 9.6800 11.440 13.2900 15.14
Karnataka 3.0 14.120000 3.624969 11.42 12.0600 12.700 15.4700 18.24
Kerala 3.0 12.583333 0.654930 11.85 12.3200 12.790 12.9500 13.11
Madhya Pradesh 3.0 12.626667 2.408492 10.11 11.4850 12.860 13.8850 14.91
Maharashtra 2.0 11.260000 3.507250 8.78 10.0200 11.260 12.5000 13.74
Manipur 2.0 14.610000 4.553768 11.39 13.0000 14.610 16.2200 17.83
Meghalaya 3.0 6.953333 2.401548 4.87 5.6400 6.410 7.9950 9.58
Mizoram 2.0 17.700000 7.636753 12.30 15.0000 17.700 20.4000 23.10
Nagaland 2.0 16.415000 7.870098 10.85 13.6325 16.415 19.1975 21.98
Odisha 3.0 9.836667 3.411412 6.19 8.2800 10.370 11.6600 12.95
Punjab 2.0 11.185000 1.746554 9.95 10.5675 11.185 11.8025 12.42
Rajasthan 2.0 11.320000 0.070711 11.27 11.2950 11.320 11.3450 11.37
Sikkim 3.0 10.486667 1.622108 9.39 9.5550 9.720 11.0350 12.35
Tamil Nadu 3.0 12.336667 1.268910 10.99 11.7500 12.510 13.0100 13.51
Telangana 3.0 12.763333 0.248462 12.61 12.6200 12.630 12.8400 13.05
Tripura 2.0 17.030000 1.569777 15.92 16.4750 17.030 17.5850 18.14
Uttar Pradesh 3.0 11.940000 2.416464 10.51 10.5450 10.580 12.6550 14.73
Uttarakhand 3.0 11.803333 3.189864 8.12 10.8800 13.640 13.6450 13.65
Andaman & Nicobar Islands 2.0 13.785000 4.094148 10.89 12.3375 13.785 15.2325 16.68
Chandigarh 3.0 11.960000 2.952440 8.84 10.5850 12.330 13.5200 14.71
Delhi 3.0 12.160000 1.236487 10.96 11.5250 12.090 12.7600 13.43
Puducherry 3.0 12.053333 3.305334 10.14 10.1450 10.150 13.0100 15.87
All_India GDP 3.0 11.203333 1.565162 9.99 10.3200 10.650 11.8100 12.97
In [38]:
# states having mean growth rate greater than 13 and standard deviation greater than 2

describe[(describe['mean']<12) & (describe['std']>2)]
Out[38]:
count mean std min 25% 50% 75% max
Goa 3.0 6.033333 10.290444 -5.77 2.490 10.75 11.935 13.12
Haryana 3.0 11.846667 3.238245 9.18 10.045 10.91 13.180 15.45
Jammu & Kashmir 3.0 10.900000 6.642146 4.70 7.395 10.09 14.000 17.91
Jharkhand 3.0 11.500000 3.610374 7.92 9.680 11.44 13.290 15.14
Maharashtra 2.0 11.260000 3.507250 8.78 10.020 11.26 12.500 13.74
Meghalaya 3.0 6.953333 2.401548 4.87 5.640 6.41 7.995 9.58
Odisha 3.0 9.836667 3.411412 6.19 8.280 10.37 11.660 12.95
Uttar Pradesh 3.0 11.940000 2.416464 10.51 10.545 10.58 12.655 14.73
Uttarakhand 3.0 11.803333 3.189864 8.12 10.880 13.64 13.645 13.65
Chandigarh 3.0 11.960000 2.952440 8.84 10.585 12.33 13.520 14.71
In [39]:
df1.head()
Out[39]:
Items Description Duration Andhra Pradesh Arunachal Pradesh Assam Bihar Chhattisgarh Goa Gujarat Haryana ... Tamil Nadu Telangana Tripura Uttar Pradesh Uttarakhand Andaman & Nicobar Islands Chandigarh Delhi Puducherry All_India GDP
0 GSDP - CURRENT PRICES (` in Crore) 2011-12 379402.0 11063.0 143175.0 247144.0 158074.0 42367.0 615606.0 297539.0 ... 751485.0 359433.0 19208.0 724049.0 115523.0 3979.0 18768.0 343767.0 16818.0 8736039.0
1 GSDP - CURRENT PRICES (` in Crore) 2012-13 411404.0 12547.0 156864.0 282368.0 177511.0 38120.0 724495.0 347032.0 ... 855481.0 401493.0 21663.0 822903.0 131835.0 4421.0 21609.0 391238.0 18875.0 9946636.0
2 GSDP - CURRENT PRICES (` in Crore) 2013-14 464272.0 14602.0 177745.0 317101.0 206690.0 35921.0 807623.0 400662.0 ... 971090.0 452186.0 25593.0 944146.0 149817.0 5159.0 24787.0 443783.0 21870.0 11236635.0
3 GSDP - CURRENT PRICES (` in Crore) 2014-15 526468.0 16761.0 198098.0 373920.0 234982.0 40633.0 895027.0 437462.0 ... 1092564.0 511178.0 29667.0 1043371.0 161985.0 5721.0 27844.0 492424.0 24089.0 12433749.0
4 GSDP - CURRENT PRICES (` in Crore) 2015-16 609934.0 18784.0 224234.0 413503.0 260776.0 45002.0 994316.0 485184.0 ... 1212668.0 575631.0 NaN 1153795.0 184091.0 NaN 30304.0 551963.0 26533.0 13675331.0

5 rows × 35 columns

In [40]:
# filtering out the data for the year 2015-16 and storing it in a dataframe
total_GDP_15_16 = df1[(df1['Items  Description'] == 'GSDP - CURRENT PRICES (` in Crore)') & (df1['Duration'] == '2015-16')]
total_GDP_15_16
Out[40]:
Items Description Duration Andhra Pradesh Arunachal Pradesh Assam Bihar Chhattisgarh Goa Gujarat Haryana ... Tamil Nadu Telangana Tripura Uttar Pradesh Uttarakhand Andaman & Nicobar Islands Chandigarh Delhi Puducherry All_India GDP
4 GSDP - CURRENT PRICES (` in Crore) 2015-16 609934.0 18784.0 224234.0 413503.0 260776.0 45002.0 994316.0 485184.0 ... 1212668.0 575631.0 NaN 1153795.0 184091.0 NaN 30304.0 551963.0 26533.0 13675331.0

1 rows × 35 columns

In [41]:
# carrying out necessary transformation to make the data ready for plotting

total_GDP_15_16_states = total_GDP_15_16[total_GDP_15_16.columns[2:34]].transpose()
total_GDP_15_16_states = total_GDP_15_16_states.rename(columns={4: 'Total GDP of States 2015-16'})
total_GDP_15_16_states = total_GDP_15_16_states.dropna()
total_GDP_15_16_states = total_GDP_15_16_states.sort_values('Total GDP of States 2015-16',ascending=True)
total_GDP_15_16_states
Out[41]:
Total GDP of States 2015-16
Sikkim 16637.0
Arunachal Pradesh 18784.0
Puducherry 26533.0
Meghalaya 26745.0
Chandigarh 30304.0
Goa 45002.0
Jammu & Kashmir 118387.0
Uttarakhand 184091.0
Assam 224234.0
Jharkhand 241955.0
Chhattisgarh 260776.0
Odisha 341887.0
Bihar 413503.0
Haryana 485184.0
Madhya Pradesh 543975.0
Delhi 551963.0
Telangana 575631.0
Kerala 588337.0
Andhra Pradesh 609934.0
Gujarat 994316.0
Karnataka 1027068.0
Uttar Pradesh 1153795.0
Tamil Nadu 1212668.0
In [42]:
plt.figure(figsize=(10,8), dpi = 600)

sns.barplot(x = total_GDP_15_16_states['Total GDP of States 2015-16'], y = total_GDP_15_16_states.index,palette='plasma')
plt.xlabel('Total GDP of States for 2015-16', fontsize=12)
plt.ylabel('States', fontsize=12)
plt.title('Total GDP of States 2015-16 for all the states',fontsize=12)
plt.show()
In [43]:
top_5_eco = total_GDP_15_16_states[-5:]
top_5_eco
Out[43]:
Total GDP of States 2015-16
Andhra Pradesh 609934.0
Gujarat 994316.0
Karnataka 1027068.0
Uttar Pradesh 1153795.0
Tamil Nadu 1212668.0
In [44]:
bottom_5_eco = total_GDP_15_16_states[:5]
bottom_5_eco
Out[44]:
Total GDP of States 2015-16
Sikkim 16637.0
Arunachal Pradesh 18784.0
Puducherry 26533.0
Meghalaya 26745.0
Chandigarh 30304.0

Part I- B¶

1.Taking the data(states) (Except for Delhi, Chandigarh, Andaman and Nicobar Islands, etc.) for further analysis, as they are governed directly by the centre, not state governments.¶

In [50]:
pip install pandas openpyxl
Requirement already satisfied: pandas in c:\users\deepi\anaconda3\lib\site-packages (2.0.3)
Requirement already satisfied: openpyxl in c:\users\deepi\anaconda3\lib\site-packages (3.0.10)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\deepi\anaconda3\lib\site-packages (from pandas) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in c:\users\deepi\anaconda3\lib\site-packages (from pandas) (2023.3.post1)
Requirement already satisfied: tzdata>=2022.1 in c:\users\deepi\anaconda3\lib\site-packages (from pandas) (2023.3)
Requirement already satisfied: numpy>=1.21.0 in c:\users\deepi\anaconda3\lib\site-packages (from pandas) (1.24.3)
Requirement already satisfied: et_xmlfile in c:\users\deepi\anaconda3\lib\site-packages (from openpyxl) (1.1.0)
Requirement already satisfied: six>=1.5 in c:\users\deepi\anaconda3\lib\site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)
Note: you may need to restart the kernel to use updated packages.
In [51]:
AP = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Andhra_Pradesh-GSVA_cur_2016-17.csv')
Arunachal = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Arunachal_Pradesh-GSVA_cur_2015-16.csv')
AN = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\All States\NAD-Andaman_Nicobar_Islands-GSVA_cur_2014-15.csv')
Assam = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Assam-GSVA_cur_2015-16.csv')
Bihar = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Bihar-GSVA_cur_2015-16.csv')
Chhattisgarh = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Chhattisgarh-GSVA_cur_2016-17.csv')
Delhi = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Delhi-GSVA_cur_2016-17.csv')
Goa = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Goa-GSVA_cur_2015-16.csv')
GJ = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Gujarat-GSVA_cur_2015-16.csv')
HR = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Haryana-GSVA_cur_2016-17.csv')
HP = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Himachal_Pradesh-GSVA_cur_2014-15.csv')
JK = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Jammu_Kashmir-GSVA_cur_2015-16.csv')
Jharkhand = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Jharkhand-GSVA_cur_2015-16.csv')
KA = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Karnataka-GSVA_cur_2015-16.csv')
KL = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Kerala-GSVA_cur_2015-16.csv')
MP = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Madhya_Pradesh-GSVA_cur_2016-17.csv')
MH = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Maharashtra-GSVA_cur_2014-15.csv')
Meghalaya = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Meghalaya-GSVA_cur_2016-17.csv')
Mizoram = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Mizoram-GSVA_cur_2014-15.csv')
NL = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Nagaland-GSVA_cur_2014-15.csv')
Odisha = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Odisha-GSVA_cur_2016-17.csv')
Puducherry = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Puducherry-GSVA_cur_2016-17.csv')
Punjab = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Punjab-GSVA_cur_2014-15.csv')
RJ = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Rajasthan-GSVA_cur_2014-15.csv')
Sikkim = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Sikkim-GSVA_cur_2015-16.csv')
TN = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Tamil_Nadu-GSVA_cur_2016-17.csv')
Telangana = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Telangana-GSVA_cur_2016-17.csv')
Tripura = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Tripura-GSVA_cur_2014-15.csv')
UP = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Uttar_Pradesh-GSVA_cur_2015-16.csv')
UK = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Uttarakhand-GSVA_cur_2015-16.csv')
Manipur = pd.read_excel('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\Manipur1.xlsx')
  Cell In[51], line 1
    AP = pd.read_csv('C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Andhra_Pradesh-GSVA_cur_2016-17.csv')
                                                                                                              ^
SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 47-48: malformed \N character escape
In [62]:
import pandas as pd

AP = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Andhra_Pradesh-GSVA_cur_2016-17.csv')
Arunachal = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Arunachal_Pradesh-GSVA_cur_2015-16.csv')
AN = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Andaman_Nicobar_Islands-GSVA_cur_2014-15.csv')
Assam = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Assam-GSVA_cur_2015-16.csv')
Bihar = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Bihar-GSVA_cur_2015-16.csv')
Chhattisgarh = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Chhattisgarh-GSVA_cur_2016-17.csv')
Delhi = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Delhi-GSVA_cur_2016-17.csv')
Goa = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Goa-GSVA_cur_2015-16.csv')
GJ = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Gujarat-GSVA_cur_2015-16.csv')
HR = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Haryana-GSVA_cur_2016-17.csv')
HP = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Himachal_Pradesh-GSVA_cur_2014-15.csv')
JK = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Jammu_Kashmir-GSVA_cur_2015-16.csv')
Jharkhand = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Jharkhand-GSVA_cur_2015-16.csv')
KA = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Karnataka-GSVA_cur_2015-16.csv')
KL = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Kerala-GSVA_cur_2015-16.csv')
MP = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Madhya_Pradesh-GSVA_cur_2016-17.csv')
MH = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Maharashtra-GSVA_cur_2014-15.csv')
Meghalaya = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Meghalaya-GSVA_cur_2016-17.csv')
Mizoram = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Mizoram-GSVA_cur_2014-15.csv')
NL = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Nagaland-GSVA_cur_2014-15.csv')
Odisha = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Odisha-GSVA_cur_2016-17.csv')
Puducherry = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Puducherry-GSVA_cur_2016-17.csv')
Punjab = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Punjab-GSVA_cur_2014-15.csv')
RJ = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Rajasthan-GSVA_cur_2014-15.csv')
Sikkim = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Sikkim-GSVA_cur_2015-16.csv')
TN = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Tamil_Nadu-GSVA_cur_2016-17.csv')
Telangana = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Telangana-GSVA_cur_2016-17.csv')
Tripura = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Tripura-GSVA_cur_2014-15.csv')
UP = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Uttar_Pradesh-GSVA_cur_2015-16.csv')
UK = pd.read_csv(r'C:\MBA\3rd SEM\applicationofsoftware\GDP\States\NAD-Uttarakhand-GSVA_cur_2015-16.csv')

Taking data for 2014-15¶

In [63]:
A_P = Arunachal[['S.No.','Item', '2014-15']]
A_P = A_P.rename(columns={'2014-15': 'Arunachal'})

assam = Assam[['S.No.','Item', '2014-15']]
assam = assam.rename(columns={'2014-15': 'Assam'})

bihar = Bihar[['S.No.','Item', '2014-15']]
bihar = bihar.rename(columns={'2014-15': 'Bihar'})

chhattisgarh = Chhattisgarh[['S.No.','Item', '2014-15']]
chhattisgarh = chhattisgarh.rename(columns={'2014-15': 'Chhattisgarh'})

goa = Goa[['S.No.','Item', '2014-15']]
goa = goa.rename(columns={'2014-15': 'Goa'})

gujarat = GJ[['S.No.','Item', '2014-15']]
gujarat = gujarat.rename(columns={'2014-15': 'GJ'})

haryana = HR[['S.No.','Item', '2014-15']]
haryana = haryana.rename(columns={'2014-15': 'HR'})

himachal_Pradesh = HP[['S.No.','Item', '2014-15']]
himachal_Pradesh = himachal_Pradesh.rename(columns={'2014-15': 'HP'})

jharkhand = Jharkhand[['S.No.','Item', '2014-15']]
jharkhand = jharkhand.rename(columns={'2014-15': 'Jharkhand'})

karnataka = KA[['S.No.','Item', '2014-15']]
karnataka = karnataka.rename(columns={'2014-15': 'KA'})

kerala = KL[['S.No.','Item', '2014-15']]
kerala = kerala.rename(columns={'2014-15': 'KL'})

madhya_pradesh = MP[['S.No.','Item', '2014-15']]
madhya_pradesh = madhya_pradesh.rename(columns={'2014-15': 'MP'})

maharashtra = MH[['S.No.','Item', '2014-15']]
maharashtra = maharashtra.rename(columns={'2014-15': 'MH'})

#manipur = Manipur[['S.No.','Item', '2014-15']]
#manipur = manipur.rename(columns={'2014-15': 'Manipur'})

meghalaya = Meghalaya[['S.No.','Item', '2014-15']]
meghalaya = meghalaya.rename(columns={'2014-15': 'Meghalaya'})

mizoram = Mizoram[['S.No.','Item', '2014-15']]
mizoram = mizoram.rename(columns={'2014-15': 'Mizoram'})

nagaland = NL[['S.No.','Item', '2014-15']]
nagaland = nagaland.rename(columns={'2014-15': 'NL'})

odisha = Odisha[['S.No.','Item', '2014-15']]
odisha = odisha.rename(columns={'2014-15': 'Odisha'})

punjab = Punjab[['S.No.','Item', '2014-15']]
punjab = punjab.rename(columns={'2014-15': 'Punjab'})

rajasthan = RJ[['S.No.','Item', '2014-15']]
rajasthan = rajasthan.rename(columns={'2014-15': 'RJ'})

sikkim = Sikkim[['S.No.','Item', '2014-15']]
sikkim = sikkim.rename(columns={'2014-15': 'Sikkim'})

tamil_nadu = TN[['S.No.','Item', '2014-15']]
tamil_nadu = tamil_nadu.rename(columns={'2014-15': 'TN'})

telangana = Telangana[['S.No.','Item', '2014-15']]
telangana = telangana.rename(columns={'2014-15': 'Telangana'})

tripura = Tripura[['S.No.','Item', '2014-15']]
tripura = tripura.rename(columns={'2014-15': 'Tripura'})

uttar_pradesh = UP[['S.No.','Item', '2014-15']]
uttar_pradesh = uttar_pradesh.rename(columns={'2014-15': 'UP'})

uttarakhand = UK[['S.No.','Item', '2014-15']]
uttarakhand = uttarakhand.rename(columns={'2014-15': 'UK'})
In [73]:
andhra_pradesh = AP[['S.No.','Item', '2014-15']]
andhra_pradesh = andhra_pradesh.rename(columns={'2014-15': 'AP'})

Merging all the tables for different states into a single dataframe¶

In [74]:
uttarakhand
Out[74]:
S.No. Item UK
0 1 Agriculture, forestry and fishing 1601423.0
1 1.1 Crops 866146.0
2 1.2 Livestock 391188.0
3 1.3 Forestry and logging 339293.0
4 1.4 Fishing and aquaculture 4796.0
5 2 Mining and quarrying 244549.0
6 Total Primary 1845972.0
7 3 Manufacturing 5866252.0
8 4 Electricity, gas, water supply & other utility... 433880.0
9 5 Construction 1342733.0
10 Total Secondary 7642865.0
11 6 Trade, repair, hotels and restaurants 1743106.0
12 6.1 Trade & repair services 1534073.0
13 6.2 Hotels & restaurants 209033.0
14 7 Transport, storage, communication & services r... 1066693.0
15 7.1 Railways 21295.0
16 7.2 Road transport* 307147.0
17 7.3 Water transport NaN
18 7.4 Air transport 3889.0
19 7.5 Services incidental to transport -76.0
20 7.6 Storage 660.0
21 7.7 Communication & services related to broadcasting 733778.0
22 8 Financial services 385030.0
23 9 Real estate, ownership of dwelling & professio... 831307.0
24 10 Public administration 579409.0
25 11 Other services 982430.0
26 Total Tertiary 5587975.0
27 12 TOTAL GSVA at basic prices 15076812.0
28 13 Taxes on Products 1434856.0
29 14 Subsidies on products 313139.0
30 15 Gross State Domestic Product 16198529.0
31 16 Population ('00) 105820.0
32 17 Per Capita GSDP (Rs.) 153076.0
In [76]:
s = [andhra_pradesh,A_P, assam, bihar, chhattisgarh, goa, gujarat, haryana,himachal_Pradesh,
       jharkhand, karnataka,kerala,madhya_pradesh, maharashtra,meghalaya,mizoram, nagaland,odisha,
       punjab,rajasthan,sikkim,tamil_nadu,telangana,tripura,uttarakhand, uttar_pradesh]


from functools import reduce
df_final = reduce(lambda left,right: pd.merge(left,right,how ='left',on=['S.No.', 'Item']), dfs)
In [77]:
df_final.columns
Out[77]:
Index(['S.No.', 'Item', 'AP', 'Arunachal', 'Assam', 'Bihar', 'Chhattisgarh',
       'Goa', 'GJ', 'HR', 'HP', 'Jharkhand', 'KA', 'KL', 'MP', 'MH',
       'Meghalaya', 'Mizoram', 'NL', 'Odisha', 'Punjab', 'RJ', 'Sikkim', 'TN',
       'Telangana', 'Tripura', 'UK', 'UP'],
      dtype='object')

Renaming some of the state names for merging¶

In [78]:
df_final = df_final.rename(columns={'AP':'Andhra Pradesh', 'Arunachal':'Arunachal Pradesh','GJ':'Gujarat','HR':'Haryana',
                                   'HP':'Himachal Pradesh','KA':'Karnataka','KL':'Kerala','MP':'Madhya Pradesh','MH':'Maharashtra','NL':'Nagaland','RJ':'Rajasthan',
                                   'TN':'Tamil Nadu','UP':'Uttar Pradesh',
                                   'Chhattisgarh':'Chhatisgarh','UK':'Uttrakhand'})
In [79]:
df_final
Out[79]:
S.No. Item Andhra Pradesh Arunachal Pradesh Assam Bihar Chhatisgarh Goa Gujarat Haryana ... Nagaland Odisha Punjab Rajasthan Sikkim Tamil Nadu Telangana Tripura Uttrakhand Uttar Pradesh
0 1 Agriculture, forestry and fishing 14819416 686117 3855548 7951890 3948847 308507 13769969.00 8015238.0 ... 607897 6422978 9285716 15044394 137447 13064238.0 7591501 799825.0 1601423.0 25097754
1 1.1 Crops 7893514 415520 2890544 4688237 2613371 140421 9671086.00 4636731.0 ... 375825 4382636 5690972 7673441 114976 7297820.0 4162493 397591.0 866146.0 16215212
2 1.2 Livestock 4309078 38387 173478 2060296 352208 30141 2698910.00 2916173.0 ... 123800 788243 2638842 5356257 17338 4693361.0 2951299 88176.0 391188.0 7096876
3 1.3 Forestry and logging 346160 224017 261987 550132 597785 15744 761616.00 352254.0 ... 99802 791463 848245 1956660 4529 392705.0 210741 145096.0 339293.0 1404936
4 1.4 Fishing and aquaculture 2270664 8193 529539 653224 385483 122201 638357.00 110080.0 ... 8470 460636 107657 58036 604 680352.0 266968 168961.0 4796.0 380730
5 2 Mining and quarrying 1484300 30842 1471149 68107 2451970 3622 2117218.00 25186.0 ... 8280 2586328 10354 4069385 1329 265536.0 1541853 142391.0 244549.0 901501
6 Total Primary 16303716 716959 5326697 8019997 6400817 312129 15887187.00 8040424.0 ... 616178 9009306 9296070 19113780 138776 13329774.0 9133354 942216.0 1845972.0 25999255
7 3 Manufacturing 4672266 26120 2002936 2189965 4370593 1177608 24087538.00 7756921.0 ... 18346 5754229 4790341 6552580 550697 18914794.0 6353711 228625.0 5866252.0 12261649
8 4 Electricity, gas, water supply & other utility... 1151729 113527 296587 345168 1198438 204110 3409983.00 1101919.0 ... 37944 833067 911611 1122888 212499 1710379.0 716266 77870.0 433880.0 2030625
9 5 Construction 4664889 147842 1733568 3449763 2669855 165819 5526017.00 3702571.0 ... 156072 2402396 2202962 5353326 82058 12216718.0 2854024 177899.0 1342733.0 11256450
10 Total Secondary 10488884 287489 4033091 5984896 8238886 1547536 33023538.00 12561411.0 ... 212361 8989693 7904914 13028794 845253 32841892.0 9924001 484393.0 7642865.0 25548724
11 6 Trade, repair, hotels and restaurants 4233400 60421 2987155 7448373 1535571 380927 10178713.00 4986319.0 ... 140781 3149555 4419919 7297290 70568 12895842.0 6494607 390423.0 1743106.0 9437243
12 6.1 Trade & repair services 3716000 56796 2876251 7081391 1414164 343492 10178713.00 4817784.0 ... 134174 2886789 4201252 6942748 64624 11252588.0 5724128 390423.0 1534073.0 8476139
13 6.2 Hotels & restaurants 517400 3625 110904 366982 121407 37434 NaN 168535.0 ... 6607 262766 218667 354543 5945 1643253.0 770479 NaN 209033.0 961104
14 7 Transport, storage, communication & services r... 5076984 35203 1194568 3147173 871770 189656 4555910.00 2560623.0 ... 77521 2034016 1951809 3814461 47347 7188320.0 3604741 155956.0 1066693.0 7404509
15 7.1 Railways 424228 59 252509 462413 159176 15649 511593.00 423873.0 ... 336 341494 233389 464638 0 468553.0 199686 305.0 21295.0 1618742
16 7.2 Road transport 2816000 15467 507668 1572288 386628 46171 NaN 1452364.0 ... 34548 973144 928575 2121206 35283 3660994.0 2055658 NaN NaN 3645747
17 7.3 Water transport 94200 0 4502 2228 0 17820 NaN NaN ... 600 50349 0 0 0 70414.0 0 NaN NaN 681
18 7.4 Air transport 14900 0 26223 13599 9507 46359 NaN NaN ... 4153 15354 4473 13469 0 180836.0 120691 NaN 3889.0 36582
19 7.5 Services incidental to transport 780200 109 35739 166600 5232 19272 NaN 190269.0 ... 0 117469 48124 47609 0 NaN 454909 NaN -76.0 16323
20 7.6 Storage 18700 0 10308 10618 16675 357 57634.00 14459.0 ... 89 22675 76429 16584 0 39834.0 19805 254.0 660.0 171696
21 7.7 Communication & services related to broadcasting 928756 19568 357619 919427 294552 44028 1242520.00 479658.0 ... 37794 513531 660819 1150955 12064 1903283.0 753992 66676.0 733778.0 1914737
22 8 Financial services 1900863 25207 543651 1178022 739057 233618 4606644.00 1671486.0 ... 60393 1065147 2057520 1827413 21079 5598498.0 3023729 86094.0 385030.0 3392275
23 9 Real estate, ownership of dwelling & professio... 4405409 48418 1412466 3740641 2462166 407099 5179502.00 6970183.0 ... 159651 2348714 3142786 6451997 75330 16830213.0 9478839 190704.0 831307.0 14548185
24 10 Public administration 2200897 243867 1373611 2078171 867982 346486 2576195.00 1036377.0 ... 295424 1318221 1842730 2460364 119514 3400800.0 1711265 338244.0 579409.0 6152124
25 11 Other services 4215389 218728 1795658 4587589 1112232 180431 3123413.00 2001581.0 ... 259186 2340603 3303041 4164287 149265 7430115.0 4158229 323287.0 982430.0 5034623
26 Total Tertiary 22032942 631844 9307109 22179969 7588778 1738217 30220377.00 19226568.0 ... 992956 12256258 16717805 26015812 483103 53343788.0 28471410 1484709.0 5587975.0 45968959
27 12 TOTAL GSVA at basic prices 48825542 1636292 18666897 36184863 22228481 3597882 79131102.00 39828404.0 ... 1821495 30255256 33918789 58158386 1467133 99515453.0 47528765 2911319.0 15076812.0 97516938
28 13 Taxes on Products 5512100 70099 1725309 3213546 2601791 527279 12353171.04 4985670.0 ... 57674 3151184 3794100 5394503 72200 12507325.0 4425700 149345.0 1434856.0 10107396
29 14 Subsidies on products 1690800 30272 582406 2006421 1332092 61854 1981546.00 1067867.0 ... 37745 1209349 911800 2333442 18400 2766405.0 836700 94002.0 313139.0 3287219
30 15 Gross State Domestic Product 52646842 1676119 19809800 37391988 23498180 4063307 89502727.00 43746207.0 ... 1841424 32197092 36801089 61219447 1520933 109256373.0 51117765 2966662.0 16198529.0 104337115
31 16 Population ('00) 501510 14870 326780 1101240 270530 14950 633590.00 266620.0 ... 20550 435220 290673 721610 6330 745760.0 367660 38350.0 105820.0 2109940
32 17 Per Capita GSDP (Rs.) 104977 112718 60621 33954 86860 271793 141263.00 164077.0 ... 89607 73979 126606 84837 240274 146503.0 139035 77358.0 153076.0 49450

33 rows × 28 columns

Creating the GDP per capita Data Frame¶

In [80]:
AP
Out[80]:
S.No. Item 2011-12 2012-13 2013-14 2014-15 2015-16 2016-17
0 1 Agriculture, forestry and fishing 9400805 11186428 12895568 14819416 17326726 20386004
1 1.1 Crops 5204052 6123041 7114707 7893514 8644285 9717089
2 1.2 Livestock 2758776 3358438 3643026 4309078 5155487 5979648
3 1.3 Forestry and logging 250314 253029 280493 346160 340550 335487
4 1.4 Fishing and aquaculture 1187663 1451920 1857342 2270664 3186404 4353780
5 2 Mining and quarrying 1416194 1459027 1419200 1484300 1633100 1757565
6 Total Primary 10816999 12645455 14314768 16303716 18959826 22143569
7 3 Manufacturing 5070622 4237657 4242337 4672266 5078685 5740816
8 4 Electricity, gas, water supply & other utility... 1076517 713029 1014130 1151729 1251910 1298947
9 5 Construction 3702645 3759004 4065131 4664889 4986189 5467732
10 Total Secondary 9849784 8709690 9321598 10488884 11316784 12507496
11 6 Trade, repair, hotels and restaurants 2673600 3179200 3784900 4233400 4577700 5174600
12 6.1 Trade & repair services 2322700 2775600 3290800 3716000 3964000 4382200
13 6.2 Hotels & restaurants 350900 403600 494100 517400 613700 792400
14 7 Transport, storage, communication & services r... 3251379 3778632 4365811 5076984 5806226 6766317
15 7.1 Railways 264279 303702 334468 424228 436596 445372
16 7.2 Road transport 1824800 2200400 2516200 2816000 3217400 3798300
17 7.3 Water transport 95100 90400 60600 94200 110500 123600
18 7.4 Air transport 4500 10600 5100 14900 25000 28000
19 7.5 Services incidental to transport 501800 535500 634700 780200 904200 1020800
20 7.6 Storage 20700 16600 18700 18700 19000 20600
21 7.7 Communication & services related to broadcasting 540200 621430 796043 928756 1093530 1329645
22 8 Financial services 1425608 1584297 1710463 1900863 2125508 2415390
23 9 Real estate, ownership of dwelling & professio... 2833805 3434098 3897345 4405409 5092310 5942431
24 10 Public administration 1393752 1552379 1738971 2200897 2594904 3039676
25 11 Other services 2730376 3063920 3538298 4215389 5248604 6180240
26 Total Tertiary 14308520 16592526 19035788 22032942 25445252 29518654
27 12 TOTAL GSVA at basic prices 34975303 37947671 42672154 48825542 55721862 64169719
28 13 Taxes on Products 4243900 4656500 5263500 5512100 6719300 7236500
29 14 Subsidies on products 1279000 1463800 1508500 1690800 1447800 1475500
30 15 Gross State Domestic Product 37940203 41140371 46427154 52646842 60993362 69930719
31 16 Population ('00) 492750 495660 498570 501510 504460 507430
32 17 Per Capita GSDP (Rs.) 76997 83001 93121 104977 120908 137814
In [81]:
gdp_per_capita = df_final.iloc[32][2:].sort_values()
gdp_per_capita = gdp_per_capita.to_frame(name = 'GDP per capita')
gdp_per_capita
Out[81]:
GDP per capita
Bihar 33954
Uttar Pradesh 49450
Assam 60621
Jharkhand 62091
Madhya Pradesh 62989
Odisha 73979
Meghalaya 76228.0
Tripura 77358.0
Rajasthan 84837
Chhatisgarh 86860
Nagaland 89607
Mizoram 97687
Andhra Pradesh 104977
Arunachal Pradesh 112718
Punjab 126606
Telangana 139035
Gujarat 141263.0
Karnataka 145141
Tamil Nadu 146503.0
Himachal Pradesh 147330
Maharashtra 152853
Uttrakhand 153076.0
Kerala 154778.0
Haryana 164077.0
Sikkim 240274
Goa 271793

2.Plotting GDP per capita¶

In [82]:
plt.figure(figsize=(12,8), dpi=600)                             

sns.barplot(x = gdp_per_capita['GDP per capita'], y =gdp_per_capita.index, palette='Blues' )
plt.xlabel('GDP per capita', fontsize=12)
plt.ylabel('States', fontsize=12)
plt.title('GDP per capita vs States',fontsize=12)
plt.show()

2A.Top 5 states based on GDP per capita¶

In [83]:
top_5 = gdp_per_capita[-5:]
top_5
Out[83]:
GDP per capita
Uttrakhand 153076.0
Kerala 154778.0
Haryana 164077.0
Sikkim 240274
Goa 271793

2A.Bottom 5 states based on GDP per capita¶

In [84]:
bottom_5 = gdp_per_capita[:5]
bottom_5
Out[84]:
GDP per capita
Bihar 33954
Uttar Pradesh 49450
Assam 60621
Jharkhand 62091
Madhya Pradesh 62989

2B.Find the ratio of the highest per capita GDP to the lowest per capita GDP.¶

In [85]:
ratio = gdp_per_capita['GDP per capita'].max()/gdp_per_capita['GDP per capita'].min()

print('The Ratio of highest per capita GDP to the lowest per capita GDP is: ',ratio)
The Ratio of highest per capita GDP to the lowest per capita GDP is:  8.004741709371503

3.Plot the percentage contribution of the primary, secondary and tertiary sectors as a percentage of the total GDP for all the states.¶

In [86]:
primary = df_final[df_final['Item']=='Primary']
secondary = df_final[df_final['Item']=='Secondary']
tertiary = df_final[df_final['Item']=='Tertiary']
gdp = df_final[df_final['Item']=='Gross State Domestic Product']

merged = pd.concat([primary, secondary,tertiary,gdp], axis = 0).reset_index()
merged =  merged.drop(['index','S.No.'], axis = 1).set_index('Item')
In [87]:
merged
Out[87]:
Andhra Pradesh Arunachal Pradesh Assam Bihar Chhatisgarh Goa Gujarat Haryana Himachal Pradesh Jharkhand ... Nagaland Odisha Punjab Rajasthan Sikkim Tamil Nadu Telangana Tripura Uttrakhand Uttar Pradesh
Item
Primary 16303716 716959 5326697 8019997 6400817 312129 15887187.0 8040424.0 1548366 5248354 ... 616178 9009306 9296070 19113780 138776 13329774.0 9133354 942216.0 1845972.0 25999255
Secondary 10488884 287489 4033091 5984896 8238886 1547536 33023538.0 12561411.0 4119162 6241471 ... 212361 8989693 7904914 13028794 845253 32841892.0 9924001 484393.0 7642865.0 25548724
Tertiary 22032942 631844 9307109 22179969 7588778 1738217 30220377.0 19226568.0 4133326 8133341 ... 992956 12256258 16717805 26015812 483103 53343788.0 28471410 1484709.0 5587975.0 45968959
Gross State Domestic Product 52646842 1676119 19809800 37391988 23498180 4063307 89502727.0 43746207.0 10436879 21710718 ... 1841424 32197092 36801089 61219447 1520933 109256373.0 51117765 2966662.0 16198529.0 104337115

4 rows × 26 columns

calculating the percentage contribution of each sector to the Gross State Domestic Product for each state¶

In [88]:
merged.loc['primary_percentage'] = merged.loc['Primary'] / merged.loc['Gross State Domestic Product'] * 100
merged.loc['secondary_percentage'] = merged.loc['Secondary'] / merged.loc['Gross State Domestic Product'] * 100
merged.loc['tertiary_percentage'] = merged.loc['Tertiary'] / merged.loc['Gross State Domestic Product'] * 100
merged
Out[88]:
Andhra Pradesh Arunachal Pradesh Assam Bihar Chhatisgarh Goa Gujarat Haryana Himachal Pradesh Jharkhand ... Nagaland Odisha Punjab Rajasthan Sikkim Tamil Nadu Telangana Tripura Uttrakhand Uttar Pradesh
Item
Primary 1.630372e+07 7.169590e+05 5.326697e+06 8.019997e+06 6.400817e+06 3.121290e+05 1.588719e+07 8.040424e+06 1.548366e+06 5.248354e+06 ... 6.161780e+05 9.009306e+06 9.296070e+06 1.911378e+07 1.387760e+05 1.332977e+07 9.133354e+06 9.422160e+05 1.845972e+06 2.599926e+07
Secondary 1.048888e+07 2.874890e+05 4.033091e+06 5.984896e+06 8.238886e+06 1.547536e+06 3.302354e+07 1.256141e+07 4.119162e+06 6.241471e+06 ... 2.123610e+05 8.989693e+06 7.904914e+06 1.302879e+07 8.452530e+05 3.284189e+07 9.924001e+06 4.843930e+05 7.642865e+06 2.554872e+07
Tertiary 2.203294e+07 6.318440e+05 9.307109e+06 2.217997e+07 7.588778e+06 1.738217e+06 3.022038e+07 1.922657e+07 4.133326e+06 8.133341e+06 ... 9.929560e+05 1.225626e+07 1.671780e+07 2.601581e+07 4.831030e+05 5.334379e+07 2.847141e+07 1.484709e+06 5.587975e+06 4.596896e+07
Gross State Domestic Product 5.264684e+07 1.676119e+06 1.980980e+07 3.739199e+07 2.349818e+07 4.063307e+06 8.950273e+07 4.374621e+07 1.043688e+07 2.171072e+07 ... 1.841424e+06 3.219709e+07 3.680109e+07 6.121945e+07 1.520933e+06 1.092564e+08 5.111776e+07 2.966662e+06 1.619853e+07 1.043371e+08
primary_percentage 3.096808e+01 4.277495e+01 2.688920e+01 2.144844e+01 2.723963e+01 7.681649e+00 1.775051e+01 1.837971e+01 1.483553e+01 2.417402e+01 ... 3.346204e+01 2.798174e+01 2.526031e+01 3.122175e+01 9.124399e+00 1.220045e+01 1.786728e+01 3.176014e+01 1.139592e+01 2.491851e+01
secondary_percentage 1.992310e+01 1.715206e+01 2.035907e+01 1.600582e+01 3.506180e+01 3.808563e+01 3.689668e+01 2.871429e+01 3.946737e+01 2.874834e+01 ... 1.153243e+01 2.792082e+01 2.148011e+01 2.128212e+01 5.557464e+01 3.005947e+01 1.941400e+01 1.632788e+01 4.718246e+01 2.448671e+01
tertiary_percentage 4.185045e+01 3.769685e+01 4.698235e+01 5.931744e+01 3.229517e+01 4.277838e+01 3.376476e+01 4.395025e+01 3.960308e+01 3.746233e+01 ... 5.392327e+01 3.806635e+01 4.542747e+01 4.249599e+01 3.176360e+01 4.882442e+01 5.569768e+01 5.004645e+01 3.449681e+01 4.405811e+01

7 rows × 26 columns

In [89]:
# Transposing the dataframe for better understanding

merged = merged.T
merged = merged.sort_values('Gross State Domestic Product')
merged
Out[89]:
Item Primary Secondary Tertiary Gross State Domestic Product primary_percentage secondary_percentage tertiary_percentage
Mizoram 225598.0 270072.0 637619.0 1155933.0 19.516529 23.363984 55.160550
Sikkim 138776.0 845253.0 483103.0 1520933.0 9.124399 55.574637 31.763595
Arunachal Pradesh 716959.0 287489.0 631844.0 1676119.0 42.774946 17.152064 37.696846
Nagaland 616178.0 212361.0 992956.0 1841424.0 33.462038 11.532434 53.923268
Meghalaya 451050.0 637942.0 1200655.0 2440807.0 18.479544 26.136520 49.190903
Tripura 942216.0 484393.0 1484709.0 2966662.0 31.760140 16.327880 50.046450
Goa 312129.0 1547536.0 1738217.0 4063307.0 7.681649 38.085628 42.778382
Himachal Pradesh 1548366.0 4119162.0 4133326.0 10436879.0 14.835527 39.467373 39.603084
Uttrakhand 1845972.0 7642865.0 5587975.0 16198529.0 11.395924 47.182463 34.496805
Assam 5326697.0 4033091.0 9307109.0 19809800.0 26.889201 20.359070 46.982347
Jharkhand 5248354.0 6241471.0 8133341.0 21710718.0 24.174023 28.748340 37.462331
Chhatisgarh 6400817.0 8238886.0 7588778.0 23498180.0 27.239629 35.061805 32.295173
Odisha 9009306.0 8989693.0 12256258.0 32197092.0 27.981738 27.920823 38.066351
Punjab 9296070.0 7904914.0 16717805.0 36801089.0 25.260312 21.480109 45.427474
Bihar 8019997.0 5984896.0 22179969.0 37391988.0 21.448437 16.005825 59.317437
Haryana 8040424.0 12561411.0 19226568.0 43746207.0 18.379705 28.714286 43.950252
Madhya Pradesh 17854020.0 10044889.0 18117360.0 48198169.0 37.042942 20.840810 37.589312
Telangana 9133354.0 9924001.0 28471410.0 51117765.0 17.867280 19.413996 55.697682
Kerala 6489442.0 12070040.0 29673778.0 52600230.0 12.337288 22.946744 56.413780
Andhra Pradesh 16303716.0 10488884.0 22032942.0 52646842.0 30.968080 19.923102 41.850453
Rajasthan 19113780.0 13028794.0 26015812.0 61219447.0 31.221746 21.282116 42.495993
Gujarat 15887187.0 33023538.0 30220377.0 89502727.0 17.750506 36.896684 33.764756
Karnataka 12066304.0 20484404.0 50490630.0 92178806.0 13.090107 22.222466 54.774663
Uttar Pradesh 25999255.0 25548724.0 45968959.0 104337115.0 24.918511 24.486707 44.058108
Tamil Nadu 13329774.0 32841892.0 53343788.0 109256373.0 12.200454 30.059475 48.824418
Maharashtra 21758383.0 47445207.0 88631076.0 179212165.0 12.141131 26.474323 49.455948
In [90]:
plt.figure(figsize=(12,10), dpi =600)

bars1 = merged['primary_percentage']
bars2 = merged['secondary_percentage']
bars3 = merged['tertiary_percentage']
 
legends = ['Primary %', 'Secondary %', 'Tertiary %']

bars = np.add(bars1, bars2).tolist()
 
r = np.arange(0,len(merged.index))
 
names = merged.index
barWidth = 1
 
# Create red bars
plt.bar(r, bars1, color='orange', edgecolor='white')
# Create green bars (middle), on top of the firs ones
plt.bar(r, bars2, bottom=bars1, color='purple', edgecolor='white')
# Create blue bars (top)
plt.bar(r, bars3, bottom=bars, color='green', edgecolor='white')
 
plt.xticks(r, names,rotation=90)
plt.xlabel('States',fontsize=12)
plt.ylabel('Percentage contribution to GDP',fontsize=12)
plt.title('Percentage contribution of the Primary, Secondary and Tertiary sectors as a percentage of the total GDP for all the states')

plt.legend(legends)

plt.tight_layout()

4.Categorizing the states into four groups based on the GDP per capita (C1, C2, C3, C4) for the 20th, 50th, 85th and 100th percentile values.¶

C2 - States lying between the 50th and the 85th percentile¶

C3 - States lying between the 20th and the 50th percentile¶

C4 - States lying below 20th percentile¶

In [91]:
# C1 - States lying between the 85th and the 100th percentile

C1 = gdp_per_capita[gdp_per_capita['GDP per capita'] > gdp_per_capita['GDP per capita'].quantile(0.85)]
C1
Out[91]:
GDP per capita
Kerala 154778.0
Haryana 164077.0
Sikkim 240274
Goa 271793
In [92]:
# C2 - States lying between the 50th and the 85th percentile

C2 = gdp_per_capita[(gdp_per_capita['GDP per capita'] > gdp_per_capita['GDP per capita'].quantile(0.50)) & (gdp_per_capita['GDP per capita'] < gdp_per_capita['GDP per capita'].quantile(0.85))]
C2
Out[92]:
GDP per capita
Arunachal Pradesh 112718
Punjab 126606
Telangana 139035
Gujarat 141263.0
Karnataka 145141
Tamil Nadu 146503.0
Himachal Pradesh 147330
Maharashtra 152853
Uttrakhand 153076.0
In [93]:
# C3 - States lying between the 20th and the 50th percentile

C3 = gdp_per_capita[(gdp_per_capita['GDP per capita'] > gdp_per_capita['GDP per capita'].quantile(0.20)) & (gdp_per_capita['GDP per capita'] <= gdp_per_capita['GDP per capita'].quantile(0.50))]
C3
Out[93]:
GDP per capita
Meghalaya 76228.0
Tripura 77358.0
Rajasthan 84837
Chhatisgarh 86860
Nagaland 89607
Mizoram 97687
Andhra Pradesh 104977
In [94]:
# States lying below the 20th percentile

C4 = gdp_per_capita[gdp_per_capita['GDP per capita'] < gdp_per_capita['GDP per capita'].quantile(0.20)]
C4
Out[94]:
GDP per capita
Bihar 33954
Uttar Pradesh 49450
Assam 60621
Jharkhand 62091
Madhya Pradesh 62989
In [95]:
df_final
Out[95]:
S.No. Item Andhra Pradesh Arunachal Pradesh Assam Bihar Chhatisgarh Goa Gujarat Haryana ... Nagaland Odisha Punjab Rajasthan Sikkim Tamil Nadu Telangana Tripura Uttrakhand Uttar Pradesh
0 1 Agriculture, forestry and fishing 14819416 686117 3855548 7951890 3948847 308507 13769969.00 8015238.0 ... 607897 6422978 9285716 15044394 137447 13064238.0 7591501 799825.0 1601423.0 25097754
1 1.1 Crops 7893514 415520 2890544 4688237 2613371 140421 9671086.00 4636731.0 ... 375825 4382636 5690972 7673441 114976 7297820.0 4162493 397591.0 866146.0 16215212
2 1.2 Livestock 4309078 38387 173478 2060296 352208 30141 2698910.00 2916173.0 ... 123800 788243 2638842 5356257 17338 4693361.0 2951299 88176.0 391188.0 7096876
3 1.3 Forestry and logging 346160 224017 261987 550132 597785 15744 761616.00 352254.0 ... 99802 791463 848245 1956660 4529 392705.0 210741 145096.0 339293.0 1404936
4 1.4 Fishing and aquaculture 2270664 8193 529539 653224 385483 122201 638357.00 110080.0 ... 8470 460636 107657 58036 604 680352.0 266968 168961.0 4796.0 380730
5 2 Mining and quarrying 1484300 30842 1471149 68107 2451970 3622 2117218.00 25186.0 ... 8280 2586328 10354 4069385 1329 265536.0 1541853 142391.0 244549.0 901501
6 Total Primary 16303716 716959 5326697 8019997 6400817 312129 15887187.00 8040424.0 ... 616178 9009306 9296070 19113780 138776 13329774.0 9133354 942216.0 1845972.0 25999255
7 3 Manufacturing 4672266 26120 2002936 2189965 4370593 1177608 24087538.00 7756921.0 ... 18346 5754229 4790341 6552580 550697 18914794.0 6353711 228625.0 5866252.0 12261649
8 4 Electricity, gas, water supply & other utility... 1151729 113527 296587 345168 1198438 204110 3409983.00 1101919.0 ... 37944 833067 911611 1122888 212499 1710379.0 716266 77870.0 433880.0 2030625
9 5 Construction 4664889 147842 1733568 3449763 2669855 165819 5526017.00 3702571.0 ... 156072 2402396 2202962 5353326 82058 12216718.0 2854024 177899.0 1342733.0 11256450
10 Total Secondary 10488884 287489 4033091 5984896 8238886 1547536 33023538.00 12561411.0 ... 212361 8989693 7904914 13028794 845253 32841892.0 9924001 484393.0 7642865.0 25548724
11 6 Trade, repair, hotels and restaurants 4233400 60421 2987155 7448373 1535571 380927 10178713.00 4986319.0 ... 140781 3149555 4419919 7297290 70568 12895842.0 6494607 390423.0 1743106.0 9437243
12 6.1 Trade & repair services 3716000 56796 2876251 7081391 1414164 343492 10178713.00 4817784.0 ... 134174 2886789 4201252 6942748 64624 11252588.0 5724128 390423.0 1534073.0 8476139
13 6.2 Hotels & restaurants 517400 3625 110904 366982 121407 37434 NaN 168535.0 ... 6607 262766 218667 354543 5945 1643253.0 770479 NaN 209033.0 961104
14 7 Transport, storage, communication & services r... 5076984 35203 1194568 3147173 871770 189656 4555910.00 2560623.0 ... 77521 2034016 1951809 3814461 47347 7188320.0 3604741 155956.0 1066693.0 7404509
15 7.1 Railways 424228 59 252509 462413 159176 15649 511593.00 423873.0 ... 336 341494 233389 464638 0 468553.0 199686 305.0 21295.0 1618742
16 7.2 Road transport 2816000 15467 507668 1572288 386628 46171 NaN 1452364.0 ... 34548 973144 928575 2121206 35283 3660994.0 2055658 NaN NaN 3645747
17 7.3 Water transport 94200 0 4502 2228 0 17820 NaN NaN ... 600 50349 0 0 0 70414.0 0 NaN NaN 681
18 7.4 Air transport 14900 0 26223 13599 9507 46359 NaN NaN ... 4153 15354 4473 13469 0 180836.0 120691 NaN 3889.0 36582
19 7.5 Services incidental to transport 780200 109 35739 166600 5232 19272 NaN 190269.0 ... 0 117469 48124 47609 0 NaN 454909 NaN -76.0 16323
20 7.6 Storage 18700 0 10308 10618 16675 357 57634.00 14459.0 ... 89 22675 76429 16584 0 39834.0 19805 254.0 660.0 171696
21 7.7 Communication & services related to broadcasting 928756 19568 357619 919427 294552 44028 1242520.00 479658.0 ... 37794 513531 660819 1150955 12064 1903283.0 753992 66676.0 733778.0 1914737
22 8 Financial services 1900863 25207 543651 1178022 739057 233618 4606644.00 1671486.0 ... 60393 1065147 2057520 1827413 21079 5598498.0 3023729 86094.0 385030.0 3392275
23 9 Real estate, ownership of dwelling & professio... 4405409 48418 1412466 3740641 2462166 407099 5179502.00 6970183.0 ... 159651 2348714 3142786 6451997 75330 16830213.0 9478839 190704.0 831307.0 14548185
24 10 Public administration 2200897 243867 1373611 2078171 867982 346486 2576195.00 1036377.0 ... 295424 1318221 1842730 2460364 119514 3400800.0 1711265 338244.0 579409.0 6152124
25 11 Other services 4215389 218728 1795658 4587589 1112232 180431 3123413.00 2001581.0 ... 259186 2340603 3303041 4164287 149265 7430115.0 4158229 323287.0 982430.0 5034623
26 Total Tertiary 22032942 631844 9307109 22179969 7588778 1738217 30220377.00 19226568.0 ... 992956 12256258 16717805 26015812 483103 53343788.0 28471410 1484709.0 5587975.0 45968959
27 12 TOTAL GSVA at basic prices 48825542 1636292 18666897 36184863 22228481 3597882 79131102.00 39828404.0 ... 1821495 30255256 33918789 58158386 1467133 99515453.0 47528765 2911319.0 15076812.0 97516938
28 13 Taxes on Products 5512100 70099 1725309 3213546 2601791 527279 12353171.04 4985670.0 ... 57674 3151184 3794100 5394503 72200 12507325.0 4425700 149345.0 1434856.0 10107396
29 14 Subsidies on products 1690800 30272 582406 2006421 1332092 61854 1981546.00 1067867.0 ... 37745 1209349 911800 2333442 18400 2766405.0 836700 94002.0 313139.0 3287219
30 15 Gross State Domestic Product 52646842 1676119 19809800 37391988 23498180 4063307 89502727.00 43746207.0 ... 1841424 32197092 36801089 61219447 1520933 109256373.0 51117765 2966662.0 16198529.0 104337115
31 16 Population ('00) 501510 14870 326780 1101240 270530 14950 633590.00 266620.0 ... 20550 435220 290673 721610 6330 745760.0 367660 38350.0 105820.0 2109940
32 17 Per Capita GSDP (Rs.) 104977 112718 60621 33954 86860 271793 141263.00 164077.0 ... 89607 73979 126606 84837 240274 146503.0 139035 77358.0 153076.0 49450

33 rows × 28 columns

Creating dataframe for C1, C2, C3 and C4 states¶¶

In [96]:
C1_df = df_final[['S.No.','Item']+list(states for states in C1.index)]
C2_df = df_final[['S.No.','Item']+list(states for states in C2.index)]
C3_df = df_final[['S.No.','Item']+list(states for states in C3.index)]
C4_df = df_final[['S.No.','Item']+list(states for states in C4.index)]
In [97]:
C1_df = C1_df.iloc[[0,5,7,8,9,11,14,22,23,30]]
C2_df = C2_df.iloc[[0,5,7,8,9,11,14,22,23,30]]
C3_df = C3_df.iloc[[0,5,7,8,9,11,14,22,23,30]]
C4_df = C4_df.iloc[[0,5,7,8,9,11,14,22,23,30]]
In [98]:
C1_df.reset_index(drop=True, inplace=True)
C2_df.reset_index(drop=True, inplace=True)
C3_df.reset_index(drop=True, inplace=True)
C4_df.reset_index(drop=True, inplace=True)

C1_df
Out[98]:
S.No. Item Kerala Haryana Sikkim Goa
0 1 Agriculture, forestry and fishing 5930617.0 8015238.0 137447 308507
1 2 Mining and quarrying 558824.0 25186.0 1329 3622
2 3 Manufacturing 4273567.0 7756921.0 550697 1177608
3 4 Electricity, gas, water supply & other utility... 482470.0 1101919.0 212499 204110
4 5 Construction 7314003.0 3702571.0 82058 165819
5 6 Trade, repair, hotels and restaurants 8557345.0 4986319.0 70568 380927
6 7 Transport, storage, communication & services r... 4020934.0 2560623.0 47347 189656
7 8 Financial services 2010306.0 1671486.0 21079 233618
8 9 Real estate, ownership of dwelling & professio... 7287633.0 6970183.0 75330 407099
9 15 Gross State Domestic Product 52600230.0 43746207.0 1520933 4063307

C-1 States¶

In [100]:
# Creating a new column for Total values for all the sub-sectors for all the states and the column for the percentage contribution
# to the total GSDP by each of the sub-sectors for all the states

C1_df['Total for all states'] = C1_df['Kerala']+C1_df['Haryana']+C1_df['Sikkim']+C1_df['Goa']
C1_df['Percentage of Total GDP'] = C1_df['Total for all states']/C1_df['Total for all states'] [9] * 100
C1_df
Out[100]:
S.No. Item Kerala Haryana Sikkim Goa Total for all states Percentage of Total GDP
0 1 Agriculture, forestry and fishing 5930617.0 8015238.0 137447 308507 14391809.0 14.119213
1 2 Mining and quarrying 558824.0 25186.0 1329 3622 588961.0 0.577805
2 3 Manufacturing 4273567.0 7756921.0 550697 1177608 13758793.0 13.498187
3 4 Electricity, gas, water supply & other utility... 482470.0 1101919.0 212499 204110 2000998.0 1.963097
4 5 Construction 7314003.0 3702571.0 82058 165819 11264451.0 11.051090
5 6 Trade, repair, hotels and restaurants 8557345.0 4986319.0 70568 380927 13995159.0 13.730076
6 7 Transport, storage, communication & services r... 4020934.0 2560623.0 47347 189656 6818560.0 6.689409
7 8 Financial services 2010306.0 1671486.0 21079 233618 3936489.0 3.861928
8 9 Real estate, ownership of dwelling & professio... 7287633.0 6970183.0 75330 407099 14740245.0 14.461049
9 15 Gross State Domestic Product 52600230.0 43746207.0 1520933 4063307 101930677.0 100.000000

Identifying the major sub-sectors contributing more to the GSDP based on the cumulative sum¶

In [102]:
C1_contributor = C1_df[['Item','Percentage of Total GDP']][:-1].sort_values(by='Percentage of Total GDP', ascending=False)
C1_contributor.reset_index(drop=True, inplace=True)
C1_contributor['Cumulative sum'] = C1_contributor['Percentage of Total GDP'].cumsum()
C1_contributor
Out[102]:
Item Percentage of Total GDP Cumulative sum
0 Real estate, ownership of dwelling & professio... 14.461049 14.461049
1 Agriculture, forestry and fishing 14.119213 28.580261
2 Trade, repair, hotels and restaurants 13.730076 42.310337
3 Manufacturing 13.498187 55.808524
4 Construction 11.051090 66.859614
5 Transport, storage, communication & services r... 6.689409 73.549023
6 Financial services 3.861928 77.410951
7 Electricity, gas, water supply & other utility... 1.963097 79.374048
8 Mining and quarrying 0.577805 79.951853
In [103]:
plt.figure(figsize=(6,4), dpi=600)
sns.barplot(y=C1_contributor['Item'], x = C1_contributor['Percentage of Total GDP'], palette='hot')
plt.xlabel("Percentage of Total GSDP for C1 States")
plt.ylabel('Sub-sectors')
plt.title('Percentage of Total GSDP for C1 States vs Sub-sectors')
plt.savefig("Percentage of Total GSDP for C1 States vs Sub-sectors.png", bbox_inches='tight', dpi=600)

plt.show()

C-2 States¶

In [104]:
C2_df
Out[104]:
S.No. Item Arunachal Pradesh Punjab Telangana Gujarat Karnataka Tamil Nadu Himachal Pradesh Maharashtra Uttrakhand
0 1 Agriculture, forestry and fishing 686117 9285716 7591501 13769969.0 11219422 13064238.0 1514981 16475655 1601423.0
1 2 Mining and quarrying 30842 10354 1541853 2117218.0 846882 265536.0 33385 5282727 244549.0
2 3 Manufacturing 26120 4790341 6353711 24087538.0 12953843 18914794.0 2543637 33660294 5866252.0
3 4 Electricity, gas, water supply & other utility... 113527 911611 716266 3409983.0 1425762 1710379.0 767268 4334702 433880.0
4 5 Construction 147842 2202962 2854024 5526017.0 6104799 12216718.0 808256 9450211 1342733.0
5 6 Trade, repair, hotels and restaurants 60421 4419919 6494607 10178713.0 8991658 12895842.0 615496 15839100 1743106.0
6 7 Transport, storage, communication & services r... 35203 1951809 3604741 4555910.0 5097652 7188320.0 552234 9697246 1066693.0
7 8 Financial services 25207 2057520 3023729 4606644.0 4094169 5598498.0 362521 16143324 385030.0
8 9 Real estate, ownership of dwelling & professio... 48418 3142786 9478839 5179502.0 24766393 16830213.0 1125937 30718051 831307.0
9 15 Gross State Domestic Product 1676119 36801089 51117765 89502727.0 92178806 109256373.0 10436879 179212165 16198529.0
In [105]:
C2_df['Total for all states']=list(C2_df[list(states for states in C2_df.columns)[2:]].sum(axis=1))
C2_df['Percentage of Total GDP'] = C2_df['Total for all states']/C2_df['Total for all states'][9] * 100
C2_contributor = C2_df[['Item','Percentage of Total GDP']][:-2].sort_values(by='Percentage of Total GDP', ascending=False)
C2_contributor.reset_index(drop=True, inplace=True)
C2_contributor['Cumulative sum'] = C2_contributor['Percentage of Total GDP'].cumsum()
C2_contributor
Out[105]:
Item Percentage of Total GDP Cumulative sum
0 Manufacturing 18.622130 18.622130
1 Agriculture, forestry and fishing 12.825977 31.448107
2 Trade, repair, hotels and restaurants 10.443537 41.891644
3 Construction 6.932967 48.824611
4 Financial services 6.189947 55.014559
5 Transport, storage, communication & services r... 5.755616 60.770175
6 Electricity, gas, water supply & other utility... 2.357408 63.127583
7 Mining and quarrying 1.769047 64.896630
In [106]:
plt.figure(figsize=(6,4), dpi=600)
sns.barplot(y=C2_contributor['Item'], x = C2_contributor['Percentage of Total GDP'],palette='winter')
plt.xlabel("Percentage of Total GSDP for C2 States")
plt.ylabel('Sub-sectors')
plt.title('Percentage of Total GSDP for C2 States vs Sub-sectors')
plt.show()

C-3 States¶

In [107]:
C3_df['Total for all states']=list(C3_df[list(states for states in C3_df.columns)[2:]].sum(axis=1))
C3_df['Percentage of Total GDP'] = C3_df['Total for all states']/C3_df['Total for all states'][9] * 100
C3_contributor = C3_df[['Item','Percentage of Total GDP']][:-2].sort_values(by='Percentage of Total GDP', ascending=False)
C3_contributor.reset_index(drop=True, inplace=True)
C3_contributor['Cumulative sum'] = C3_contributor['Percentage of Total GDP'].cumsum()
C3_contributor
Out[107]:
Item Percentage of Total GDP Cumulative sum
0 Agriculture, forestry and fishing 24.562038 24.562038
1 Manufacturing 11.171147 35.733185
2 Trade, repair, hotels and restaurants 9.669906 45.403091
3 Construction 9.127797 54.530888
4 Transport, storage, communication & services r... 6.995621 61.526509
5 Mining and quarrying 5.659246 67.185755
6 Financial services 3.225013 70.410769
7 Electricity, gas, water supply & other utility... 2.587450 72.998218
In [108]:
plt.figure(figsize=(6,4), dpi=600)
sns.barplot(y=C3_contributor['Item'], x = C3_contributor['Percentage of Total GDP'], palette='cool')
plt.xlabel("Percentage of Total GSDP for C3 States")
plt.ylabel('Sub-sectors')
plt.title('Percentage of Total GSDP for C3 States vs Sub-sectors')

plt.show()

C-4 States¶

In [109]:
C4_df['Total for all states']=list(C4_df[list(states for states in C4_df.columns)[2:]].sum(axis=1))
C4_df['Percentage of Total GDP'] = C4_df['Total for all states']/C4_df['Total for all states'][9] * 100
C4_contributor = C4_df[['Item','Percentage of Total GDP']][:-2].sort_values(by='Percentage of Total GDP', ascending=False)
C4_contributor.reset_index(drop=True, inplace=True)
C4_contributor['Cumulative sum'] = C4_contributor['Percentage of Total GDP'].cumsum()
C4_contributor
Out[109]:
Item Percentage of Total GDP Cumulative sum
0 Agriculture, forestry and fishing 24.347566 24.347566
1 Trade, repair, hotels and restaurants 11.761462 36.109029
2 Manufacturing 10.774582 46.883610
3 Construction 9.778778 56.662389
4 Transport, storage, communication & services r... 6.948467 63.610856
5 Financial services 3.307088 66.917944
6 Mining and quarrying 2.634036 69.551981
7 Electricity, gas, water supply & other utility... 1.850428 71.402408
In [110]:
plt.figure(figsize=(6,4), dpi=600)
sns.barplot(y=C4_contributor['Item'], x = C4_contributor['Percentage of Total GDP'], palette='summer')
plt.xlabel("Percentage of Total GSDP for C4 States")
plt.ylabel('Sub-sectors')
plt.title('Percentage of Total GSDP for C4 States vs Sub-sectors')

plt.show()

Part-II: GDP and Education Dropout Rates¶

In [112]:
Data_B = pd.read_csv('C:\\MBA\\3rd SEM\\applicationofsoftware\\GDP\\part2.csv')
Data_B = Data_B[['Level of Education - State','Primary - 2014-2015.1','Upper Primary - 2014-2015','Secondary - 2014-2015']]
Data_B
Out[112]:
Level of Education - State Primary - 2014-2015.1 Upper Primary - 2014-2015 Secondary - 2014-2015
0 A & N Islands 0.51 1.69 9.870
1 Andhra Pradesh 6.72 5.20 15.710
2 Arunachal Pradesh 10.82 6.71 17.110
3 Assam 15.36 10.51 27.060
4 Bihar NaN 4.08 25.900
5 Chandigarh NaN 0.44 NaN
6 Chhatisgarh 2.91 5.85 21.260
7 Dadra & Nagar Haveli 1.47 4.02 16.770
8 Daman & Diu 1.11 3.11 32.270
9 Delhi NaN 0.76 11.810
10 Goa 0.73 0.07 11.150
11 Gujarat 0.89 6.41 25.040
12 Haryana 5.61 5.81 15.890
13 Himachal Pradesh 0.64 0.87 6.070
14 Jammu and Kashmir 6.79 5.44 17.280
15 Jharkhand 5.48 8.99 24.000
16 Karnataka 2.02 3.85 26.180
17 Kerala NaN NaN 12.320
18 Lakshadweep NaN 2.78 6.763
19 Madhya Pradesh 6.59 9.20 24.770
20 Maharashtra 1.26 1.79 12.870
21 Manipur 9.66 4.20 14.380
22 Meghalaya 9.46 6.52 20.520
23 Mizoram 10.10 4.78 21.880
24 Nagaland 5.61 7.92 18.230
25 Odisha 2.86 3.81 29.560
26 Puducherry 0.37 0.56 12.190
27 Punjab 3.05 3.22 8.860
28 Rajasthan 5.02 3.07 13.480
29 Sikkim 2.27 1.57 15.890
30 Tamil Nadu NaN NaN 8.100
31 Telangana 2.08 2.30 15.530
32 Tripura 1.28 1.99 28.420
33 Uttar Pradesh 8.58 2.70 10.220
34 Uttrakhand 4.04 1.19 10.400
35 West Bengal 1.47 4.30 17.800
36 All India 4.13 4.03 17.060

Dropping the rows of data for Union Territories and for which we don't have GDP per-capita available like West Bengal.¶

In [113]:
Data_B =  Data_B.drop([0,5,7,8,9,14,18,26,35,36])
Data_B = Data_B.reset_index(drop = True)
Data_B = Data_B.rename(columns={'Level of Education - State': 'State'})
In [114]:
states_gdp_per_capita = gdp_per_capita.reset_index()
states_gdp_per_capita=states_gdp_per_capita.rename(columns={'index':'State'})
In [115]:
dataB_final = pd.merge(Data_B,states_gdp_per_capita,how='left',on=['State'])
In [116]:
dataB_final = dataB_final.rename(columns={'State':'Level of education - State'})
dataB_final
Out[116]:
Level of education - State Primary - 2014-2015.1 Upper Primary - 2014-2015 Secondary - 2014-2015 GDP per capita
0 Andhra Pradesh 6.72 5.20 15.71 104977
1 Arunachal Pradesh 10.82 6.71 17.11 112718
2 Assam 15.36 10.51 27.06 60621
3 Bihar NaN 4.08 25.90 33954
4 Chhatisgarh 2.91 5.85 21.26 86860
5 Goa 0.73 0.07 11.15 271793
6 Gujarat 0.89 6.41 25.04 141263.0
7 Haryana 5.61 5.81 15.89 164077.0
8 Himachal Pradesh 0.64 0.87 6.07 147330
9 Jharkhand 5.48 8.99 24.00 62091
10 Karnataka 2.02 3.85 26.18 145141
11 Kerala NaN NaN 12.32 154778.0
12 Madhya Pradesh 6.59 9.20 24.77 62989
13 Maharashtra 1.26 1.79 12.87 152853
14 Manipur 9.66 4.20 14.38 NaN
15 Meghalaya 9.46 6.52 20.52 76228.0
16 Mizoram 10.10 4.78 21.88 97687
17 Nagaland 5.61 7.92 18.23 89607
18 Odisha 2.86 3.81 29.56 73979
19 Punjab 3.05 3.22 8.86 126606
20 Rajasthan 5.02 3.07 13.48 84837
21 Sikkim 2.27 1.57 15.89 240274
22 Tamil Nadu NaN NaN 8.10 146503.0
23 Telangana 2.08 2.30 15.53 139035
24 Tripura 1.28 1.99 28.42 77358.0
25 Uttar Pradesh 8.58 2.70 10.22 49450
26 Uttrakhand 4.04 1.19 10.40 153076.0
In [117]:
dataB_final.describe()
Out[117]:
Primary - 2014-2015.1 Upper Primary - 2014-2015 Secondary - 2014-2015
count 24.000000 25.000000 27.000000
mean 5.126667 4.504400 17.807407
std 3.890927 2.781644 6.845367
min 0.640000 0.070000 6.070000
25% 2.065000 2.300000 12.595000
50% 4.530000 4.080000 15.890000
75% 7.185000 6.410000 24.385000
max 15.360000 10.510000 29.560000
In [118]:
dataB_final.dropna()
Out[118]:
Level of education - State Primary - 2014-2015.1 Upper Primary - 2014-2015 Secondary - 2014-2015 GDP per capita
0 Andhra Pradesh 6.72 5.20 15.71 104977
1 Arunachal Pradesh 10.82 6.71 17.11 112718
2 Assam 15.36 10.51 27.06 60621
4 Chhatisgarh 2.91 5.85 21.26 86860
5 Goa 0.73 0.07 11.15 271793
6 Gujarat 0.89 6.41 25.04 141263.0
7 Haryana 5.61 5.81 15.89 164077.0
8 Himachal Pradesh 0.64 0.87 6.07 147330
9 Jharkhand 5.48 8.99 24.00 62091
10 Karnataka 2.02 3.85 26.18 145141
12 Madhya Pradesh 6.59 9.20 24.77 62989
13 Maharashtra 1.26 1.79 12.87 152853
15 Meghalaya 9.46 6.52 20.52 76228.0
16 Mizoram 10.10 4.78 21.88 97687
17 Nagaland 5.61 7.92 18.23 89607
18 Odisha 2.86 3.81 29.56 73979
19 Punjab 3.05 3.22 8.86 126606
20 Rajasthan 5.02 3.07 13.48 84837
21 Sikkim 2.27 1.57 15.89 240274
23 Telangana 2.08 2.30 15.53 139035
24 Tripura 1.28 1.99 28.42 77358.0
25 Uttar Pradesh 8.58 2.70 10.22 49450
26 Uttrakhand 4.04 1.19 10.40 153076.0

The minimum Drop out rate for Secondary is also high at 6%.¶

This means greater number of students are more likely to continue their Primary and Upper primary education but not Secondary education.¶

Primary - 2014-2015¶

In [119]:
%matplotlib inline 
In [120]:
plt.figure(figsize=(8,6), dpi= 600)

sns.scatterplot(y=dataB_final['GDP per capita'],x=dataB_final['Primary - 2014-2015.1'])
plt.xlabel('Primary Drop out rate')
plt.ylabel('Per capita GDP')
plt.title('Per capita GDP vs Primary Drop out rate')
plt.grid(True)
plt.show()

Upper Primary - 2014-2015¶

In [121]:
plt.figure(figsize=(8,6), dpi= 600)

sns.scatterplot(y=dataB_final['GDP per capita'],x=dataB_final['Upper Primary - 2014-2015'])
plt.xlabel('Upper Primary Drop out rate')
plt.ylabel('Per capita GDP')
plt.title('Per capita GDP vs Upper Primary Drop out rate')
plt.grid(True)
plt.show()

Secondary - 2014-2015¶

In [123]:
plt.figure(figsize=(8,6), dpi= 600)

# sns.regplot(y=dataB_final['GDP per capita'], x=dataB_final['Secondary - 2014-2015'])
# plt.xlabel('Secondary Drop out rate')
# plt.ylabel('Per capita GDP')
# plt.title('Per capita GDP vs Secondary Drop out rate')
# plt.grid(True)
# plt.show()

# sns.regplot(y=dataB_final['GDP per capita'], x=dataB_final['Secondary - 2014-2015'])
# plt.xlabel('Secondary Drop out rate')
# plt.ylabel('Per capita GDP')
# plt.show()

print(dataB_final.dtypes)
print(dataB_final['GDP per capita'].unique())
print(dataB_final['Secondary - 2014-2015'].unique())
dataB_final['GDP per capita'] = pd.to_numeric(dataB_final['GDP per capita'], errors='coerce')
dataB_final['Secondary - 2014-2015'] = pd.to_numeric(dataB_final['Secondary - 2014-2015'], errors='coerce')

print(dataB_final[['GDP per capita', 'Secondary - 2014-2015']].isna().sum())
dataB_final = dataB_final.dropna(subset=['GDP per capita', 'Secondary - 2014-2015'])
sns.regplot(y=dataB_final['GDP per capita'], x=dataB_final['Secondary - 2014-2015'])
plt.xlabel('Secondary Drop out rate')
plt.ylabel('Per capita GDP')
plt.show()
Level of education - State     object
Primary - 2014-2015.1         float64
Upper Primary - 2014-2015     float64
Secondary - 2014-2015         float64
GDP per capita                 object
dtype: object
[104977 112718 60621 33954 86860 271793 141263.0 164077.0 147330 62091
 145141 154778.0 62989 152853 nan 76228.0 97687 89607 73979 126606 84837
 240274 146503.0 139035 77358.0 49450 153076.0]
[15.71 17.11 27.06 25.9  21.26 11.15 25.04 15.89  6.07 24.   26.18 12.32
 24.77 12.87 14.38 20.52 21.88 18.23 29.56  8.86 13.48  8.1  15.53 28.42
 10.22 10.4 ]
GDP per capita           1
Secondary - 2014-2015    0
dtype: int64

hypothesis;¶

It is evident that education level dropout rate has a direct correlation with GDP per capita. The states should investigate why the Secondary education dropout level is high and find a solution to this problem. .¶
In [ ]: